import json
from db_helper.milvus_database_helper import MilvusDatabase
from sentence_transformers import SentenceTransformer


db = MilvusDatabase(dim=512, host="117.78.5.132", port="19530", user="root", password="Milvus@aI", database="default")
sentence_transformer = SentenceTransformer(
    model_name_or_path=f'../sentence-transformers/distiluse-base-multilingual-cased-v1',
    tokenizer_kwargs={"clean_up_tokenization_spaces": False})
with open("../pdf/钢质海船.JSON", 'r', encoding='utf-8') as f:
    data = json.load(f)
    i = 3
    pdf_name = '钢质海船入级规范2022.pdf'
    for item in data:
        title = item['title']
        contents = [f'{title} {inner}' for inner in item['content']]
        embeddings = sentence_transformer.encode(contents)
        data_list = [{"file_name": pdf_name, "page": i, "content_code": f'{title}_line_{i}',
                      "embeddings": embeddings[i], "content": contents[i]} for i in range(len(embeddings))]
        db.insert_data(entities=data_list)

